# REPLICATION
# Place-Based Campaigning: The Political Impact of Real Grassroots Mobilization
# Daniel Bischof and Thomas Kurer
# Journal of Politics

# Create Main Data Set

# load municipality-level data

m5s <- read.csv("./../../data_coded/events_groups_comune.csv", stringsAsFactors = FALSE)

m5s$mention_ref <- ifelse(grepl("referendum", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$mention_renzi <- ifelse(grepl("renzi", m5s$event_description, ignore.case = TRUE), 1, 0)
m5s$mention_cons <- ifelse(grepl("costituzionale", m5s$event_description, ignore.case = TRUE), 1, 0)

m5s$refevent <- ifelse(m5s$mention_ref==1 | m5s$mention_cons==1, 1, 0)
m5s$renzievent <- ifelse(m5s$mention_renzi==1,1,0)

# # daily events plots
# 
# 
# daily <- m5s %>% select(local_date, mention_ref, mention_cons)
# daily$one <- 1
# daily <- daily %>% group_by(local_date) %>% summarize(count_total=sum(one), count_mention=sum(mention_ref), count_share=count_mention/count_total)
# 
# ggplot(daily, (aes(x=as.Date(local_date), y=count_mention))) + 
#   geom_bar(stat="identity") + 
#     geom_vline(xintercept = as.numeric(as.Date("2016-12-06")), color="red", linetype="dashed") +
#   scale_x_date(date_breaks = "3 month", date_labels =  "%b %Y") + 
#   theme(axis.text.x=element_text(angle=60, hjust=1)) +
#   xlab("Date (Daily Information)") + ylab("Number of Events on Referendum")
# ggsave(paste0(datpath, "daily_referendum_events_abs.pdf"), width=12)
# 
# ggplot(daily, (aes(x=as.Date(local_date), y=count_share))) + 
#   geom_bar(stat="identity") + 
#     geom_vline(xintercept = as.numeric(as.Date("2016-12-06")), color="red", linetype="dashed") +
#   scale_x_date(date_breaks = "3 month", date_labels =  "%b %Y") + 
#   theme(axis.text.x=element_text(angle=60, hjust=1)) +
#   xlab("Date (Daily Information)") + ylab("Share of Events on Referendum")
# ggsave(paste0(datpath, "daily_referendum_events_share.pdf"), width=12)
# 
# # loess
# 
# ggplot(daily, (aes(x=as.Date(local_date), y=count_mention))) + 
#   geom_point(alpha=0.2, position="jitter") + geom_smooth(method="loess", span=0.2) +
#   geom_vline(xintercept = as.numeric(as.Date("2016-12-06")), color="red", linetype="dashed") +
#   scale_x_date(date_breaks = "3 month", date_labels =  "%b %Y") + 
#   theme(axis.text.x=element_text(angle=60, hjust=1)) +
#   xlab("Date (Daily Information)") + ylab("Number of Events on Referendum")
# ggsave(paste0(datpath, "daily_referendum_events_abs_loess.pdf"), width=12)

# loc_outdoor/loc_indoor classification

m5s$location <- trimws(tolower(chartr("óòèéàãù", "ooeeaau", m5s$venue_name)))


loc_outdoorstring <- c("parco", "parcheggio", "p.za", "p.zza ", "pzza", "piazza", "piazzetta", "piazzale", "piazze", "centro", "in centro", "giardini", "giardinetti", "banchetto", "spazio", "centro storico", "rotonda", "punto informativo", "luogo di incontro", "punto di incontro", "infopoint", "info point", "meeting point", "meetingpoint", "pubblico", "lungolago", "lungomare", "porta di mare", "lido", "bagni", "bagno", "porto", "marina", "marinai", "passeggiata", "statua", "bosco", "arco", "archi", "portico", "portici", "piazetta", "centro commerciale", "zona", "zona pedonale", "marciapiede", "area", "mercato", "mercatino", "galleria",  "gazebo", "5 stelle", "torre", "corso", "angolo", "stazione", "fermata", "ingresso", "difronte", "fronte", "davanti", "vicino", "presso le", "presso il", "presso la", "sotto le", "sotto la", "sotto il")
loc_indoorstring <- c("casa di", "casa","c/o", "casetta", "privata", "privato", "sede", "1° piano", "2° piano", "3° piano", "1 ° piano", "2 ° piano", "3 ° piano", "atelier", "locanda", "club", "spiaggia", "hotel", "albergo", "bar", "pub", "ostello", "caffettone", "chiosco", "ristorante", "mangeria", "trattoria", "restaurant", "taverna", "ostaria", "osteria", "pizzeria", "gelateria", "birreria", "focacceria", "museo", "associazione", "cooperativa", "centro civico", "centro socio", "centro sociale", "che'ntro sociale", "centro culturale", "centro polivalente", "centro servizi", "parrocchia", "parrocchiale", "studio", "consiglio", "centro polifunzionale", "villa", "anfiteatro", "chiesa", "negozio", "caffe", "cafe", "libreria", "biblioteca", "salone", "coffee", "caffetteria", "pasticceria", "sala", "saletta", "agriturismo", "scuola", "scuole", "universita", "istituto","auditorium", "palazzo", "palazetto", "ufficio", "uffici", "officine", "teatro", "aula", "cinema", "comunale", "centro pastorale")

m5s$loc_outdoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(loc_outdoorstring, collapse = "|"), ")\\b")), 1, 0) 
m5s$loc_indoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(loc_indoorstring, collapse = "|"), ")\\b")), 1, 0) 

# loc_indoor and loc_outdoor most often if bar name = loc_outdoorstring, e.g. caffe corso or bar al parco
m5s$loc_outdoor[m5s$loc_indoor==1] <- 0 # e.g. centro overrun if centro sociale

# parcheggio often mentioned together with an loc_indoor location (parking in front of the school), hence falsly replaced
m5s$loc_outdoor <- ifelse(grepl("parcheggio", m5s$location), 1, m5s$loc_outdoor)
m5s$loc_indoor <- ifelse(grepl("parcheggio", m5s$location), 0, m5s$loc_indoor)

# piazza as key word
m5s$loc_outdoor <- ifelse(grepl("piazza", m5s$location), 1, m5s$loc_outdoor)
m5s$loc_indoor <- ifelse(grepl("piazza", m5s$location), 0, m5s$loc_indoor)

# sometimes loc_outdoor loc described by nearby loc_indoor loc

closebyloc1 <- paste(rep(paste(loc_outdoorstring, "presso"), each = length(loc_indoorstring)), loc_indoorstring, sep = " ")
closebyloc2 <- paste(rep(paste(loc_outdoorstring, "di fronte al"), each = length(loc_indoorstring)), loc_indoorstring, sep = " ")
closebyloc3 <- paste(rep(paste(loc_outdoorstring, "antistante"), each = length(loc_indoorstring)), loc_indoorstring, sep = " ")

m5s$loc_outdoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(closebyloc1, collapse = "|"), ")\\b")), 1, m5s$loc_outdoor) 
m5s$loc_outdoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(closebyloc2, collapse = "|"), ")\\b")), 1, m5s$loc_outdoor) 
m5s$loc_outdoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(closebyloc3, collapse = "|"), ")\\b")), 1, m5s$loc_outdoor) 

m5s$loc_indoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(closebyloc1, collapse = "|"), ")\\b")), 0, m5s$loc_indoor) 
m5s$loc_indoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(closebyloc2, collapse = "|"), ")\\b")), 0, m5s$loc_indoor) 
m5s$loc_indoor <- ifelse(str_detect(m5s$location, str_c("\\b(", str_c(closebyloc3, collapse = "|"), ")\\b")), 0, m5s$loc_indoor) 

# add undefined and missing

m5s$loc_undefined <- ifelse(m5s$loc_indoor==0&m5s$loc_outdoor==0,1,0)
m5s$loc_undefined <- ifelse(is.na(m5s$location), NA, m5s$loc_undefined)

m5s$loc_missing <- ifelse(is.na(m5s$location), 1, 0)

# create summary Table A.2

tabA2_1 <- c("location", "outdoor", "indoor", "undefined", "missing", "")
tabA2_2 <- c("strings", paste(as.character(loc_outdoorstring), sep="' '", collapse=", "), paste(as.character(loc_indoorstring), sep="' '", collapse=", "), "", "", "")
tabA2_N_total <- sum(table(m5s$loc_outdoor)[2], table(m5s$loc_indoor)[2], table(m5s$loc_undefined)[2], table(m5s$loc_missing)[2])
tabA2_3 <- c("N", table(m5s$loc_outdoor)[2], table(m5s$loc_indoor)[2], table(m5s$loc_undefined)[2], table(m5s$loc_missing)[2], tabA2_N_total)
tabA2_4 <- c("%", 
             round((table(m5s$loc_outdoor)[2]/tabA2_N_total)*100, 1), 
             round((table(m5s$loc_indoor)[2]/tabA2_N_total)*100, 1), 
             round((table(m5s$loc_undefined)[2]/tabA2_N_total)*100, 1), 
             round((table(m5s$loc_missing)[2]/tabA2_N_total)*100, 1), 
             round((tabA2_N_total/tabA2_N_total)*100, 1))

tabA2 <- cbind.data.frame(tabA2_1, tabA2_2, tabA2_3, tabA2_4)
tabA2 <- xtable(tabA2)
print(tabA2, include.rownames=FALSE, file="./../../results/tables/taba2_location_descriptives.tex")

# count nr of events within different time frames

nr_events <- m5s %>% dplyr::select(local_date, yes_rsvp_count, group_id, region_id, terunit_id, province_id, comune_id, comune_name, refevent, renzievent, loc_indoor, loc_outdoor, loc_undefined, loc_missing)
nr_events$one <- 1
nr_events$date <- as.Date(nr_events$local_date)

nr_com <- nr_events %>% 
  # 365 events without date, status=proposed. do not include, unclear if happened or not.
  filter(!is.na(date)) %>%
  
  # aggregate events on comune level
  group_by(comune_id) %>%
  
  #find first event
  mutate(first_event=min(date)) %>%
  
  # total number of events per comune
  mutate(n_total=sum(one),
         # nr of events by year(group)
         n_startto09=sum(one[date<=as.Date('2008-12-31')]),
         n_09to11=sum(one[date>=as.Date('2009-01-01') & date<=as.Date('2011-12-31')]),
         n_12to15=sum(one[date>=as.Date('2012-01-01') & date<=as.Date('2015-12-31')]),
         n_startto16=sum(one[date<=as.Date('2015-12-31')]),
         n_16=sum(one[date>=as.Date('2016-01-01') & date<=as.Date('2016-12-31')]),
         n_16toend=sum(one[date>as.Date('2015-12-31')]),

         # time spans of equal length
         # full itanes panel length
         n_treat_157d=sum(one[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')]),
         n_pre_157d=sum(one[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')]),

         # x months treat
         n_treat_24m=sum(one[date>as.Date('2014-12-04') & date<=as.Date('2016-12-04')]),
         n_treat_12m=sum(one[date>as.Date('2015-12-04') & date<=as.Date('2016-12-04')]),
         n_treat_6m=sum(one[date>as.Date('2016-06-04') & date<=as.Date('2016-12-04')]),
         n_treat_120d=sum(one[date>as.Date('2016-08-06') & date<=as.Date('2016-12-04')]),
         n_treat_90d=sum(one[date>as.Date('2016-09-05') & date<=as.Date('2016-12-04')]),
         n_treat_60d=sum(one[date>as.Date('2016-10-05') & date<=as.Date('2016-12-04')]),
         n_treat_30d=sum(one[date>as.Date('2016-11-04') & date<=as.Date('2016-12-04')]),
         # campaign duration, starting jan 20 (renzi announces resignation in case of NO)
         n_treat_campaign=sum(one[date>as.Date('2016-01-20') & date<=as.Date('2016-12-04')]),
         # intense campaign final 3 months
         n_treat_campaign_short=sum(one[date>as.Date('2016-08-31') & date<=as.Date('2016-12-04')]),
         # everything before referendum
         n_treat_referendum=sum(one[date<=as.Date('2016-12-04')]),


         # x months pre
         n_pre_120d=sum(one[date>as.Date('2016-02-01') & date<as.Date('2016-06-01')]),
         n_pre_90d=sum(one[date>as.Date('2016-03-02') & date<as.Date('2016-06-01')]),
         n_pre_60d=sum(one[date>as.Date('2016-04-01') & date<as.Date('2016-06-01')]),
         n_pre_30d=sum(one[date>as.Date('2016-05-01') & date<as.Date('2016-06-01')]),
         # pre-campaign
         n_pre_campaign=sum(one[date<as.Date('2016-01-20')]),

         # events with reference to renzi or referendum
         n_total_renzi=sum(renzievent),
         n_total_ref=sum(refevent),

         # nr of events in treatment period with reference to either referendum or renzi
         n_treat_ref=sum(refevent[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
         n_treat_renzi=sum(renzievent[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
         
         # events by location
         n_total_loc_outdoor=sum(loc_outdoor, na.rm=T),
         n_total_loc_indoor=sum(loc_indoor, na.rm=T),
         n_total_loc_undefined=sum(loc_undefined, na.rm=T),
         n_total_loc_missing=sum(loc_missing, na.rm=T),

         # nr of events in treatment period by location
         n_treat_loc_outdoor=sum(loc_outdoor[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         n_treat_loc_indoor=sum(loc_indoor[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         n_treat_loc_undefined=sum(loc_undefined[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         n_treat_loc_missing=sum(loc_missing[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         
         # itanes panel length by location
         n_treat_loc_outdoor_157d=sum(loc_outdoor[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         n_treat_loc_indoor_157d=sum(loc_indoor[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         n_treat_loc_undefined_157d=sum(loc_undefined[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         n_treat_loc_missing_157d=sum(loc_missing[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         
         n_pre_loc_outdoor_157d=sum(loc_outdoor[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),
         n_pre_loc_indoor_157d=sum(loc_indoor[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),
         n_pre_loc_undefined_157d=sum(loc_undefined[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),
         n_pre_loc_missing_157d=sum(loc_missing[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),

         # post referendum
         n_posttreat=sum(one[date>as.Date('2016-12-04')]),
         
         
         # tscs data set: yearly number of events
         n_2005=sum(one[date>as.Date('2005-01-01') & date<=as.Date('2005-12-31')]),
         n_2006=sum(one[date>as.Date('2006-01-01') & date<=as.Date('2006-12-31')]),
         n_2007=sum(one[date>as.Date('2007-01-01') & date<=as.Date('2007-12-31')]),
         n_2008=sum(one[date>as.Date('2008-01-01') & date<=as.Date('2008-12-31')]),
         n_2009=sum(one[date>as.Date('2009-01-01') & date<=as.Date('2009-12-31')]),
         n_2010=sum(one[date>as.Date('2010-01-01') & date<=as.Date('2010-12-31')]),
         n_2011=sum(one[date>as.Date('2011-01-01') & date<=as.Date('2011-12-31')]),
         n_2012=sum(one[date>as.Date('2012-01-01') & date<=as.Date('2012-12-31')]),
         n_2013=sum(one[date>as.Date('2013-01-01') & date<=as.Date('2013-12-31')]),
         n_2014=sum(one[date>as.Date('2014-01-01') & date<=as.Date('2014-12-31')]),
         n_2015=sum(one[date>as.Date('2015-01-01') & date<=as.Date('2015-12-31')]),
         n_2016=sum(one[date>as.Date('2016-01-01') & date<=as.Date('2016-12-31')]),
         n_2017=sum(one[date>as.Date('2017-01-01') & date<=as.Date('2017-12-31')]),
         n_2018=sum(one[date>as.Date('2018-01-01') & date<=as.Date('2018-12-31')]),
         
         # election 13 - election 18
        
         n_ge13toge18=sum(one[date>as.Date('2013-02-25') & date<=as.Date('2018-03-04')]),
        
         # for placebos: (1) 3months after referendum. (2) everything afterwards (3) monthly sum afterward

         n_postref_m0_m3=sum(one[date>as.Date('2016-12-05') & date<=as.Date('2017-03-05')]),
         n_postref_m3_end=sum(one[date>as.Date('2017-03-05')]),
         n_postref_m0_m12=sum(one[date>as.Date('2016-12-05') & date<=as.Date('2017-12-05')]),
        
         n_postref_m0_m1  =sum(one[date>as.Date('2016-12-05') & date<=as.Date('2017-01-05')]),
         n_postref_m1_m2  =sum(one[date>as.Date('2017-01-05') & date<=as.Date('2017-02-05')]),
         n_postref_m2_m3  =sum(one[date>as.Date('2017-02-05') & date<=as.Date('2017-03-05')]),
         n_postref_m3_m4  =sum(one[date>as.Date('2017-03-05') & date<=as.Date('2017-04-05')]),
         n_postref_m4_m5  =sum(one[date>as.Date('2017-04-05') & date<=as.Date('2017-05-05')]),
         n_postref_m5_m6  =sum(one[date>as.Date('2017-05-05') & date<=as.Date('2017-06-05')]),
         n_postref_m6_m7  =sum(one[date>as.Date('2017-06-05') & date<=as.Date('2017-07-05')]),
         n_postref_m7_m8  =sum(one[date>as.Date('2017-07-05') & date<=as.Date('2017-08-05')]),
         n_postref_m8_m9  =sum(one[date>as.Date('2017-08-05') & date<=as.Date('2017-09-05')]),
         n_postref_m9_m10 =sum(one[date>as.Date('2017-09-05') & date<=as.Date('2017-10-05')]),
         n_postref_m10_m11=sum(one[date>as.Date('2017-10-05') & date<=as.Date('2017-11-05')]),
         n_postref_m11_m12=sum(one[date>as.Date('2017-11-05') & date<=as.Date('2017-12-05')]),
        


         # same variables, but weighted by rsvp
         wn_total=sum((one*yes_rsvp_count)),
         # nr of events by year(group)
         wn_startto09=sum((one*yes_rsvp_count)[date<=as.Date('2008-12-31')]),
         wn_09to11=sum((one*yes_rsvp_count)[date>=as.Date('2009-01-01') & date<=as.Date('2011-12-31')]),
         wn_12to15=sum((one*yes_rsvp_count)[date>=as.Date('2012-01-01') & date<=as.Date('2015-12-31')]),
         wn_startto16=sum((one*yes_rsvp_count)[date<=as.Date('2015-12-31')]),
         wn_16=sum((one*yes_rsvp_count)[date>=as.Date('2016-01-01') & date<=as.Date('2016-12-31')]),
         wn_16toend=sum((one*yes_rsvp_count)[date>as.Date('2015-12-31')]),

         # time spans of equal length
         # full itanes panel length
         wn_treat_157d=sum((one*yes_rsvp_count)[date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')]),
         wn_pre_157d=sum((one*yes_rsvp_count)[date>as.Date('2015-12-27') & date<as.Date('2016-06-01')]),
         # x months treat
         wn_treat_24m=sum((one*yes_rsvp_count)[date>as.Date('2014-12-04') & date<=as.Date('2016-12-04')]),
         wn_treat_12m=sum((one*yes_rsvp_count)[date>as.Date('2015-12-04') & date<=as.Date('2016-12-04')]),
         wn_treat_6m=sum((one*yes_rsvp_count)[date>as.Date('2016-06-04') & date<=as.Date('2016-12-04')]),
         wn_treat_120d=sum((one*yes_rsvp_count)[date>as.Date('2016-08-06') & date<=as.Date('2016-12-04')]),
         wn_treat_90d=sum((one*yes_rsvp_count)[date>as.Date('2016-09-05') & date<=as.Date('2016-12-04')]),
         wn_treat_60d=sum((one*yes_rsvp_count)[date>as.Date('2016-10-05') & date<=as.Date('2016-12-04')]),
         wn_treat_30d=sum((one*yes_rsvp_count)[date>as.Date('2016-11-04') & date<=as.Date('2016-12-04')]),
         # campaign duration, starting jan 20 (renzi announces resignation in case of NO)
         wn_treat_campaign=sum((one*yes_rsvp_count)[date>as.Date('2016-01-20') & date<=as.Date('2016-12-04')]),
         # intense campaign final 3 months
         wn_treat_campaign_short=sum((one*yes_rsvp_count)[date>as.Date('2016-08-31') & date<=as.Date('2016-12-04')]),
         # everything before referendum
         wn_treat_referendum=sum((one*yes_rsvp_count)[date<=as.Date('2016-12-04')]),


         # x months pre
         wn_pre_120d=sum((one*yes_rsvp_count)[date>as.Date('2016-02-01') & date<as.Date('2016-06-01')]),
         wn_pre_90d=sum((one*yes_rsvp_count)[date>as.Date('2016-03-02') & date<as.Date('2016-06-01')]),
         wn_pre_60d=sum((one*yes_rsvp_count)[date>as.Date('2016-04-01') & date<as.Date('2016-06-01')]),
         wn_pre_30d=sum((one*yes_rsvp_count)[date>as.Date('2016-05-01') & date<as.Date('2016-06-01')]),
         # pre-campaign
         wn_pre_campaign=sum((one*yes_rsvp_count)[date<as.Date('2016-01-20')]),

         # events with reference to renzi or referendum
         wn_total_renzi=sum(renzievent*yes_rsvp_count),
         wn_total_ref=sum(refevent*yes_rsvp_count),

         # nr of events in treatment period with reference to either referendum or renzi
         wn_treat_ref=sum( (refevent*yes_rsvp_count)[date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
         wn_treat_renzi=sum( (renzievent*yes_rsvp_count) [date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')]),
         
         # post referendum
         wn_posttreat=sum((one*yes_rsvp_count)[date>as.Date('2016-12-04')]),
         
         # events by location
         wn_total_loc_outdoor=sum(loc_outdoor*yes_rsvp_count, na.rm=T),
         wn_total_loc_indoor=sum(loc_indoor*yes_rsvp_count, na.rm=T),
         wn_total_loc_undefined=sum(loc_undefined*yes_rsvp_count, na.rm=T),
         wn_total_loc_missing=sum(loc_missing*yes_rsvp_count, na.rm=T),

         # nr of events in treatment period by location
         wn_treat_loc_outdoor=sum( (loc_outdoor*yes_rsvp_count) [date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         wn_treat_loc_indoor=sum( (loc_indoor*yes_rsvp_count) [date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         wn_treat_loc_undefined=sum( (loc_undefined*yes_rsvp_count) [date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         wn_treat_loc_missing=sum( (loc_missing*yes_rsvp_count) [date>as.Date('2016-07-01') & date<=as.Date('2016-12-04')], na.rm=T),
         
        # itanes length by location
         wn_treat_loc_outdoor_157d=sum( (loc_outdoor*yes_rsvp_count) [date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         wn_treat_loc_indoor_157d=sum( (loc_indoor*yes_rsvp_count) [date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         wn_treat_loc_undefined_157d=sum( (loc_undefined*yes_rsvp_count) [date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         wn_treat_loc_missing_157d=sum( (loc_missing*yes_rsvp_count) [date>as.Date('2016-06-30') & date<=as.Date('2016-12-04')], na.rm=T),
         
         wn_pre_loc_outdoor_157d=sum( (loc_outdoor*yes_rsvp_count) [date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),
         wn_pre_loc_indoor_157d=sum( (loc_indoor*yes_rsvp_count) [date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),
         wn_pre_loc_undefined_157d=sum( (loc_undefined*yes_rsvp_count) [date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),
         wn_pre_loc_missing_157d=sum( (loc_missing*yes_rsvp_count) [date>as.Date('2015-12-27') & date<as.Date('2016-06-01')], na.rm=T),

         # tscs data set: yearly number of events
         wn_2005=sum((one*yes_rsvp_count)[date>as.Date('2005-01-01') & date<=as.Date('2005-12-31')]),
         wn_2006=sum((one*yes_rsvp_count)[date>as.Date('2006-01-01') & date<=as.Date('2006-12-31')]),
         wn_2007=sum((one*yes_rsvp_count)[date>as.Date('2007-01-01') & date<=as.Date('2007-12-31')]),
         wn_2008=sum((one*yes_rsvp_count)[date>as.Date('2008-01-01') & date<=as.Date('2008-12-31')]),
         wn_2009=sum((one*yes_rsvp_count)[date>as.Date('2009-01-01') & date<=as.Date('2009-12-31')]),
         wn_2010=sum((one*yes_rsvp_count)[date>as.Date('2010-01-01') & date<=as.Date('2010-12-31')]),
         wn_2011=sum((one*yes_rsvp_count)[date>as.Date('2011-01-01') & date<=as.Date('2011-12-31')]),
         wn_2012=sum((one*yes_rsvp_count)[date>as.Date('2012-01-01') & date<=as.Date('2012-12-31')]),
         wn_2013=sum((one*yes_rsvp_count)[date>as.Date('2013-01-01') & date<=as.Date('2013-12-31')]),
         wn_2014=sum((one*yes_rsvp_count)[date>as.Date('2014-01-01') & date<=as.Date('2014-12-31')]),
         wn_2015=sum((one*yes_rsvp_count)[date>as.Date('2015-01-01') & date<=as.Date('2015-12-31')]),
         wn_2016=sum((one*yes_rsvp_count)[date>as.Date('2016-01-01') & date<=as.Date('2016-12-31')]),
         wn_2017=sum((one*yes_rsvp_count)[date>as.Date('2017-01-01') & date<=as.Date('2017-12-31')]),
         wn_2018=sum((one*yes_rsvp_count)[date>as.Date('2018-01-01') & date<=as.Date('2018-12-31')]),
         
         # election 13 - election 18
        
         wn_ge13toge18=sum((one*yes_rsvp_count)[date>as.Date('2013-02-25') & date<=as.Date('2018-03-04')]),
        
         # for placebos: (1) 3months after referendum. (2) everything afterwards (3) monthly sum afterward

         wn_postref_m0_m3=sum((one*yes_rsvp_count)[date>as.Date('2016-12-05') & date<=as.Date('2017-03-05')]),
         wn_postref_m3_end=sum((one*yes_rsvp_count)[date>as.Date('2017-03-05')]),
         wn_postref_m0_m12=sum((one*yes_rsvp_count)[date>as.Date('2016-12-05') & date<=as.Date('2017-12-05')]),
        
         wn_postref_m0_m1  =sum((one*yes_rsvp_count)[date>as.Date('2016-12-05') & date<=as.Date('2017-01-05')]),
         wn_postref_m1_m2  =sum((one*yes_rsvp_count)[date>as.Date('2017-01-05') & date<=as.Date('2017-02-05')]),
         wn_postref_m2_m3  =sum((one*yes_rsvp_count)[date>as.Date('2017-02-05') & date<=as.Date('2017-03-05')]),
         wn_postref_m3_m4  =sum((one*yes_rsvp_count)[date>as.Date('2017-03-05') & date<=as.Date('2017-04-05')]),
         wn_postref_m4_m5  =sum((one*yes_rsvp_count)[date>as.Date('2017-04-05') & date<=as.Date('2017-05-05')]),
         wn_postref_m5_m6  =sum((one*yes_rsvp_count)[date>as.Date('2017-05-05') & date<=as.Date('2017-06-05')]),
         wn_postref_m6_m7  =sum((one*yes_rsvp_count)[date>as.Date('2017-06-05') & date<=as.Date('2017-07-05')]),
         wn_postref_m7_m8  =sum((one*yes_rsvp_count)[date>as.Date('2017-07-05') & date<=as.Date('2017-08-05')]),
         wn_postref_m8_m9  =sum((one*yes_rsvp_count)[date>as.Date('2017-08-05') & date<=as.Date('2017-09-05')]),
         wn_postref_m9_m10 =sum((one*yes_rsvp_count)[date>as.Date('2017-09-05') & date<=as.Date('2017-10-05')]),
         wn_postref_m10_m11=sum((one*yes_rsvp_count)[date>as.Date('2017-10-05') & date<=as.Date('2017-11-05')]),
         wn_postref_m11_m12=sum((one*yes_rsvp_count)[date>as.Date('2017-11-05') & date<=as.Date('2017-12-05')])
         
         ) %>%
  # transform to comune-level data
  filter(row_number(one) == 1) %>%
  ungroup()



# add history variables by comune (when were groups active?)

nr_com$hist_n_early <- nr_com$n_startto09
nr_com$hist_n_mid <- nr_com$n_09to11
nr_com$hist_n_late <- nr_com$n_12to15

nr_com$hist_wn_early <- nr_com$wn_startto09
nr_com$hist_wn_mid <- nr_com$wn_09to11
nr_com$hist_wn_late <- nr_com$wn_12to15

# history of group

nr_com$hist_days <- difftime("2016-12-04", nr_com$first_event)
nr_com$hist_days <- as.numeric(nr_com$hist_days)
nr_com$hist_days <- ifelse(nr_com$hist_days<0, 0, nr_com$hist_days) # groups that were founded after referendum
hist(nr_com$hist_days, breaks=100)

# add population weights (values of january 1st, 2017 to match number of comuni)

pop <- read.csv("./../../data_original/comune info/DCIS_POPRES1_18112019185956659.csv", stringsAsFactors = FALSE)
pop <- pop %>% dplyr::select(ITTER107, Territorio, Value) %>% 
  dplyr::rename(comune_id_leading0=ITTER107, comune_name=Territorio, pop=Value) %>% 
  dplyr::filter(comune_name!="Italia")

pop$comune_id <- as.numeric(as.character(pop$comune_id_leading0))

# merge

nr_com_pop <- merge(nr_com, pop, by=c("comune_id"), all=TRUE)

# write

write.csv(nr_com_pop, "./../../data_coded/events_comune.csv")

